/*--------------------------------------------------------------------------------
*      							Guinea Final Analysis
Author: 			Jacobus Cilliers
Last date modified:	January 16 2022
Description: 		Replicates Appendix Tables and Figures for the paper "Heterogenous Teacher Effects of Two Incentive Schemes: Evidence from a Low-Income Country"
 
*--------------------------------------------------------------------------------*/
*--------------------------------------------------------------------------------
* Set the critical parameters of the computing environment.
*--------------------------------------------------------------------------------

* Clear all computer memory and delete any existing stored graphs and matrices:
clear all
clear matrix
set more off

*Define globals (Note: you need to set our own global for the folder path)
if c(username) == "jacob" { 
	global root "C:\Users\jacob\Dropbox\guinea_teacher_incentives\replication" 
	}

	global final "$root/data/final"
	global tables "$root/output/tables"
	global figs "$root/output/figures"	
	
*--------------------------------------------------------------------------------
*--------------------------------------------------------------------------------
* Set the critical parameters of the computing environment.
*--------------------------------------------------------------------------------
 
* Clear all computer memory and delete any existing stored graphs and matrices:
clear all
clear matrix
set more off

use "$final/student_all_years.dta", clear

**Include the Ebola data
merge m:1 code_ecole using "$final\Ebola by prefecture with school code.dta", gen(merge_ebola)
lab var no_ebola "No Ebola"
lab var ebola "No. deaths, Ebola"
replace ebola = 0 if ebola == .


************************************************
*** A.1 an A.2: Manually Added (No analysis) ***
************************************************

****************************************************
*** A.3 Quality of Implementation (teacher data) ***
****************************************************
*Note: The following code exports all the statistcs of Table A.3 into excel. But we performed manual editing to produce the final Table. 
*Open teacher midline dataset
use "$final/teacher_cleaned_2013.dta", clear

*label variable for "Which grades targeted?"
lab var grade_3_4 "Grades 3 and 4"
lab var grade_2 "Only Grade 2"
lab var grade_3 "Only Grade 3"
lab var grade_4 "Only Grade 4"

*We needed to do some recoding to solve logical inconsistencies in the responses. The teachers were allowed to select many options for what should have been mutually exclusive options. 
replace grade_3_4 = 1 if grade_3 == 1 & grade_4 == 1 //it cannot be "only" if they state both grades. 
replace grade_3 = 0 if grade_3_4 == 1 //they cannot say "only grade 3" and "only grade 4" and "both grades"
replace grade_4 = 0 if grade_3_4 == 1 //they cannot say "only grade 3" and "only grade 4" and "both grades"
egen responses = rsum(grade_3_4 grade_?) //some issues of people still saying "grade 2 only" AND mentioning another grade

*Label variables: "Type of award"
lab var receive_money  "Financial"
lab var receive_gifts   "In-Kind"
lab var receive_certificate   "Certificate"
lab var receive_ceremony "Ceremony"

*Label variables for "Performance metric"
lab var student_test_scores "Student test scores"
lab var inspection_score "Inspection score"
lab var french_evaluated "French evaluated"
lab var math_evaluated "Math evaluated"
lab var science_evaluated "Science evaluated"

*Label variables for "Abs vs Relative Performance"
lab var relative_other_teachers "Relative---Other teachers in school"
lab var relative_other_schools "Relative---Other schools"

*Create one variable for treatment assignment
gen treat = T1
replace treat = 2 if T2 == 1
*The following code exports all the means, although not in the same format as recorded in the paper. 
iebaltab in_program grade_* receive* student_test_scores inspection_score *evaluated relative* *received /// 
		,  grpvar(treat) covariates(i.strat) save("$tables\teacher_knowledge_midline.xls") replace  vce(cluster code_ecole) ///
		 grplabels("0 Control @ 1 In-Kind @ 2 Recognition") rowvarlabels     control(0)  nottest

*Open dataset for endline teacher survey
use "$final/teacher_cleaned_2014.dta", clear
*Create one variable for treatment assignment
gen treat = T1
replace treat = 2 if T2 == 1

*label variable for "Which grades targeted?"
lab var grade_3_4 "Grades 3 and 4"
lab var grade_2 "Only Grade 2"
lab var grade_3 "Only Grade 3"
lab var grade_4 "Only Grade 4"

*Label variables: "Type of award"
lab var receive_money  "Financial"
lab var receive_gifts   "In-Kind"
lab var receive_certificate   "Certificate"
lab var receive_ceremony "Ceremony"

*Label variables for "Performance metric"
lab var student_test_scores "Student test scores"
lab var inspection_score "Inspection score"
lab var french_evaluated "French evaluated"
lab var math_evaluated "Math evaluated"
lab var science_evaluated "Science evaluated"

*Label variables for "Abs vs Relative Performance"
lab var relative_other_teachers "Relative---Other teachers in school"
lab var relative_other_schools "Relative---Other schools"

*Label variables for "Type of Award"
lab var teacher_received "Received"
lab var teacher_received_money "Money"
lab var teacher_received_inkind "In-Kind"
lab var teacher_received_certificate "Certificate"
lab var teacher_received_ceremony "Ceremony"
lab var school_received "School received"
*The following code exports all the means, although not in the same format as recorded in the paper. 
iebaltab in_program grade_* receive* student_test_scores inspection_score *evaluated relative* *received* /// 
		,  grpvar(treat) covariates(i.strat) save("$tables\teacher_knowledge_endline.xls") replace  vce(cluster code_ecole) ///
		 grplabels("0 Control @ 1 In-Kind @ 2 Recognition") rowvarlabels     control(1)  
		 
************************************************
*** A.4 an A.5: Manually Added (No analysis) ***
************************************************
		 
	
*************************************************************************************
*** A.6 Log number of students assessed--- by grade and round of data collection  ***
*************************************************************************************
	
*Open student-level data
use "$final\student_all_years.dta", clear

**Create variable for number of students per school for each grade and year. 
gen tt = students_per_grade if grade == 3 
bys code_ecole period: egen gr3_students_per_school = max(tt)
gen tt1 = students_per_grade if grade == 4
bys code_ecole period: egen gr4_students_per_school = max(tt1)
gen tt2 = students_per_grade if grade == 2
bys code_ecole period: egen gr2_students_per_school = max(tt2)
drop tt tt1 tt2

**Create a variable for the baseline enrollment in each grade (variable is filled for all the observations). This will be used as controls. 
gen tt = gr2_students_per_school if baseline == 1
gen tt1 = gr3_students_per_school if baseline == 1
gen tt2 = students_per_school if baseline == 1
bys code_ecole: egen base_gr2_students_per_school = max(tt)
bys code_ecole: egen base_gr3_students_per_school = max(tt1)
bys code_ecole: egen base_students_per_school = max(tt2)
drop tt tt1 tt2

*THe following table estimated the treatment effect on the total number of students assessed at baseline/midline/endline. 		
	*Collapse data to a school level. 
	duplicates drop code_ecole period, force

		
	*Take the log of "students per school per grade"
	forvalues i =2/4 {
		gen ln_gr`i'_students_per_school = ln(gr`i'_students_per_school)
	}
	eststo clear
	*Baseline balance on total number of grade 2 and grade 3 students assessed 
	forvalues i = 2/3 {
	 reg ln_gr`i'_students_per_school T1 T2 i.strat  if baseline == 1, cluster(code_ecole)
		qui test T1 = T2
		local p1 `r(p)' 	
		qui sum gr`i'_students_per_school if treat == 0 & baseline == 1
		local m `r(mean)'
		eststo enrol_p0_gr`i', addscalars(m `m' p1 	`p1')		
	}
	*Midline and endline balance on total number grade 3 and grade 4 students assessed 
	forvalues i = 3/4 {
		forvalues p = 1/2 {
			reg ln_gr`i'_students_per_school T1 T2 i.strat if period == `p', cluster(code_ecole)
			qui test T1 = T2
			local p1 `r(p)' 	
			qui sum gr`i'_students_per_school if treat == 0 & period == `p'
			local m `r(mean)'
			eststo enrol_p`p'_gr`i', addscalars(m `m'  p1 	`p1')		
		}
	}
	*Midline balance for total number of grade 2 students
	qui reg ln_gr2_students_per_school T1 T2 i.strat if period == 1, cluster(code_ecole)
			qui test T1 = T2
			local p1 `r(p)' 	
			qui sum gr2_students_per_school if treat == 0 & period == 1
			local m `r(mean)'
			eststo enrol_p1_gr2, addscalars(m `m'  p1 	`p1')		
	
	
	
	esttab 	enrol_p0_gr2 enrol_p0_gr3 enrol_p1_gr2 enrol_p1_gr3 enrol_p1_gr4 enrol_p2_gr3 enrol_p2_gr4 ///
				using "$tables/students_assessed_by_round_treat.tex", replace ///
				tex label se(3) b(3) alignment(cl) r2 substitute(\_ _ $ \$ ) ///
				stats(m N r2 p1,  fmt(3 0 3) labels("Control mean" "Observations" "R-squared" "Test:In-Kind=Recognition")) ///	
				keep(T1 T2) ///
				collabels(none)	 ///
				mgroups("Baseline" "Midline" "Endline" , pattern(1 0 1 0 0 1 0) ///
				prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span})) ///
				mtitles("Grade 2" "Grade 3"  "Grade 2" "Grade 3"	"Grade 4" "Grade 3"	"Grade 4") ///
				nodepvars nonotes  star(  * 0.1  **  0.05 *** 0.01) 								
	


************************************************************************************************************
*** A.7 and Figure A.3 Comparison of distribution of school-grade standard deviations in student learning at midline   ***
************************************************************************************************************
	
*Use student-level data
use "$final\student_all_years.dta", clear

*Some fixes required before reshaping the data long (one observation per student-grade-subject). See the "Analysis.do" file for more detailed description. 

	**The following 15 lines of code expands the data long so two observations per individua - one for language and one for math.
	*Rename learning outcomes so can perform a reshape
	ren langirtscore irt1
	ren mathirtscore irt2
	forvalues i = 2/ 3 { 
		ren base_mean_langirtscore_g`i' base_mean_g`i'_irt1
		ren base_mean_mathirtscore_g`i' base_mean_g`i'_irt2
	}
	ren base_mean_mathirtscore_g3_mis base_g3_mis
	drop base_mean_langirtscore_g3_mis //This and the above variable are exactly the same. 
	ren base_mean_mathirtscore_g2_mis base_g2_mis 
	drop base_mean_langirtscore_g2_mis //This and the above variable are exactly the same. 	
	*Create temporary ID so can reshape
	gen tempid = _n
	*Reshape the data. 
	reshape long irt base_mean_g2_irt base_mean_g3_irt, i(tempid) j(subject)
	

**Impute the missing values. So that we do not lose observations when adding them as controls in the regression. 		
		foreach var of varlist student_age female HT_age HT_female {
			gen `var'_mis = `var' == .
			qui sum `var' if treat== 0
			replace `var' = `r(mean)' if `var' == .
		}
		*Construct the standard deviation of test scores, within each school-period-grade:
	bys code_ecole period grade: egen sd_irt = sd(irt)
	*Collapse data to a schoo-period-grade level
	duplicates drop code_ecole period grade, force

	*This outputs the numbers that are reported in Table A.1. We only keep the p-value for the combined K-S test. 
	ksmirnov sd_irt if period == 1 & grade == 3 & (treat == 0 | treat == 1), by(T1)
	ksmirnov sd_irt if period == 1 & grade == 3 & (treat == 0 | treat == 2), by(T2)	
	ksmirnov sd_irt if period == 1 & grade == 3 & (treat == 1 | treat == 2), by(T2)	

	ksmirnov sd_irt if period == 1 & grade == 4 & (treat == 0 | treat == 1), by(T1)
	ksmirnov sd_irt if period == 1 & grade == 4 & (treat == 0 | treat == 2), by(T2)	
	ksmirnov sd_irt if period == 1 & grade == 4 & (treat == 1 | treat == 2), by(T2)		
	
	*Construct the two figures that are in Figure A.3
	lab var sd_irt "Combined IRT score"

	twoway 	kdensity sd_irt if treat == 0 & grade == 3 & period ==1 ///
			|| kdensity sd_irt if treat == 1 & grade == 3 & period ==1 ///
			|| kdensity sd_irt if treat == 2 & grade == 3 & period ==1, legend(label(1 "Control") label(2  "In-Kind") label(3 "Recognition")) ///
			ytitle("Density")
		graph export "$figs\density_grade3_variation.png", replace

	twoway 	kdensity sd_irt if treat == 0 & grade == 4 & period ==1 ///
			|| kdensity sd_irt if treat == 1 & grade == 4 & period ==1 ///
			|| kdensity sd_irt if treat == 2 & grade == 4 & period ==1, legend(label(1 "Control") label(2  "In-Kind") label(3 "Recognition")) ///
			ytitle("Density")
		graph export "$figs\density_grade4_variation.png", replace


****************************************************
***   A.3 Teacher Characteristics by Gender      ***
****************************************************		
*Open baseline teacher dataset 	
use "$final/teachers_2012.dta", clear

*Some minor data cleaning/coding and variable labelling to export the summary statistics. 
gen female = b2_QE  == 2
lab def fem 0 "Male" 1 "Female"
lab val female fem
lab var female "Female"

gen satisfied_teaching = h4_QE 
lab var satisfied_teaching "Satisfied with teaching as a profession"

gen grade = 3 if classe_QE == "CE1" | classe_QE == "ce1"
replace grade = 4 if classe_QE == "CE2"

egen no_students = rsum(c6a1_QE c6a2_QE c6b1_QE c6b2_QE)
replace no_students = . if no_students == 0
lab var no_students "Class size"

gen permanent_teacher = e3 == 1
lab var permanent_teacher "Permanent"
gen adequate_thisyear = 0

gen low_recognition = h6b_QE == 1
replace low_recognition = . if h5_QE  == . & h6b_QE == .
lab var low_recognition "Insufficient--- recognition"

gen high_workload = h6d_QE == 1
replace high_workload = . if h5_QE  == . & h6d_QE == .
lab var high_workload "Excessive workload"


gen continue_teaching = h9_QE  == 8
replace continue_teaching = . if h9_QE == .
lab var continue_teaching "Choose teaching"

gen low_salary = h6a_QE == 1
replace low_salary = . if h5_QE  == . & h6a_QE == .
lab var low_salary "Insufficient--- salary"

gen salary_usd = j1_QE / 6995.912
lab var salary_usd "Monthly salary (USD)"

iebaltab no_students permanent_teacher  low_recognition high_workload low_salary salary_usd continue_teaching ///
		, total  grpvar(female) savetex("$tables\teacher_characteristics_by_gender.tex") replace  ///
		 rowvarlabels   
		
	
****************************************************
***   A.9 Baseline balance on key variables      ***
****************************************************	
*Load student-level data
use "$final/student_all_years.dta", clear

**Merge in the Ebola data
merge m:1 code_ecole using "$final\Ebola by prefecture with school code.dta", gen(merge_ebola)
lab var no_ebola "No Ebola"
lab var ebola "No. deaths, Ebola"
replace ebola = 0 if ebola == .
	
* This following loop allows me to throw in variables for each grade all in one code (set equal to missing if not the right grade). Useful with the iebaltab command. 	
	
forvalues i = 2/3 { 
	gen lang_g`i' = langirtscore
	replace lang_g`i' = . if grade ~= `i'
	lab var lang_g`i' "Language"
	gen math_g`i' = mathirtscore
	replace math_g`i' = . if grade ~= `i'
	lab var math_g`i' "Math"
	gen age_g`i' = student_age
	replace age_g`i' = . if grade ~= `i'
	lab var age_g`i' "Age"	
	gen female_g`i' = female
	replace female_g`i' = . if grade ~= `i'
	lab var female_g`i' "Female"
	}
 	
*Label some of the school-level characteristics. 	
lab var students_per_school_gr2round0 "No. grade 2 students"
lab var students_per_school_gr3round0 "No. grade 3 students"

lab var HT_female "Headteacher female"
lab var HT_age "Headteacher age"		
	
	
*Create Table (student-level data)
iebaltab math_g2 lang_g2 female_g2 age_g2 math_g3 lang_g3 female_g3 age_g3 /// Note: We manually add a row indicating that it refers to Grade 2s vs Grade3s. 
		if period == 0,  grpvar(treat) covariates(i.strat) savetex("$tables\balance.tex") replace  vce(cluster code_ecole) ///
		 grplabels("0 Control @ 1 In-Kind @ 2 Recognition") rowvarlabels     control(0)  

	 
		 
		 
*Collapse data to the school level in order to construct school-level balance statistics
preserve
	duplicates drop code_ecole, force		
 	*This table gets combined with the table above, so it is only one table for all the balance statistics.  
	iebaltab students_per_school_gr2round0 students_per_school_gr3round0  HT_female HT_age	ebola ///
			,  grpvar(treat) covariates(i.strat) savetex("$tables\balance_school.tex") replace  ///
			 grplabels("0 Control @ 1 In-Kind @ 2 Recognition") rowvarlabels    control(0) 
restore		 
		 
****************************************************
***   A.10 Attrition							 ***
****************************************************	
*Use the same dataset as for A.9	

*Data needs to be at a school level, so I preserve before collapsng it to a school level. 
preserve
	duplicates drop code_ecole, force		 
	gen base_data = (gr2round0 | gr3round0)
		
	foreach var of varlist gr2round1 gr3round1 gr4round1 gr3round2 gr4round2 {
		qui reg `var' T1 T2 i.strat if base_data == 1, robust  
		test T1 = T2
		local p1 `r(p)'
		qui sum `var' if base_data & treat == 0	
		local m `r(mean)'	
		eststo `var', addscalars(p1 `p1' m `m')		
	}
			
	esttab gr2round1 gr3round1 gr4round1 gr3round2 gr4round2 ///
			using "$tables/attrition.tex", replace ///
			tex label se(3) b(3) alignment(cl) r2 substitute(\_ _ $ \$ ) ///
			keep(T1 T2) ///
			collabels(none) ///
			stats(m N r2 p1,  fmt(3 0 3 3) labels("Control mean" "Observations" "R-squared" "Test:In-Kind=Recognition")) /// 
			mgroups("Midline" "Endline" , pattern(1 0 0 1 0) /// 
			prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span})) ///
			mtitles("Grade 2" "Grade 3" "Grade 4" "Grade 3" "Grade 4") /// 
			nodepvars nonotes nocons star(  * 0.1  **  0.05 *** 0.01)   		 
		 
restore		 
		 
	
**************************************************************************************************************
***   A.11 and A.12 Baseline balance, restricted to sample with midline (endline) student assessment data  ***
**************************************************************************************************************
	
preserve 
	forvalues i = 2/3 { //set to missing if not found in midline. Do this separately for grade2 and grade3 variables. 
		foreach var of varlist math_g`i' lang_g`i' female_g`i' age_g`i' {
			replace `var' = . if gr`i'round1 ~= 1
		}
	}
*Export balance statistics. But data is now restricted sample of grade 2 (3) data that have midline data for the same grade. 	
iebaltab math_g2 lang_g2 female_g2 age_g2 math_g3 lang_g3 female_g3 age_g3 ///
		if period == 0,  grpvar(treat) covariates(i.strat) savetex("$tables\balance_also_midline.tex") replace  vce(cluster code_ecole) ///
		 grplabels("0 Control @ 1 In-Kind @ 2 Recognition") rowvarlabels      control(0) 

restore
	
** The restrict sample to schools that have school-level endline data for at least one of the grades.
iebaltab  math_g2 lang_g2 female_g2 age_g2 math_g3 lang_g3 female_g3 age_g3 /// 
		if baseline == 1 & (gr3round2 | gr4round2),  grpvar(treat) covariates(i.strat) savetex("$tables\balance_also_endline.tex") replace  vce(cluster code_ecole) ///
		 grplabels("0 Control @ 1 In-Kind @ 2 Recognition") rowvarlabels  	   control(0) 

*Collapse data to the school level in order to construct school-level balance statistics
preserve
	duplicates drop code_ecole, force		
 	*This table gets combined with the "balance_also_midline" above, so it is only one table for all the balance statistics.  		 
	iebaltab students_per_school_gr2round0 students_per_school_gr3round0   HT_female HT_age	ebola ///
			if (gr2round1 | gr3round1 | gr4round1),  grpvar(treat) covariates(i.strat) savetex("$tables\balance_school_also_midline.tex") replace  ///
			 grplabels("0 Control @ 1 In-Kind @ 2 Recognition") rowvarlabels    control(0) 

 	*This table gets combined with the "balance_also_endline" above, so it is only one table for all the balance statistics.  		 
	iebaltab students_per_school_gr2round0 students_per_school_gr3round0   HT_female HT_age	ebola ///
			if (gr3round2 | gr4round2),  grpvar(treat) covariates(i.strat) savetex("$tables\balance_school_also_endline.tex") replace  ///
			 grplabels("0 Control @ 1 In-Kind @ 2 Recognition") rowvarlabels    control(0) 
restore 		 

***********************************************************
***   A.14 Main impacts on learning, dropping outliers  ***
***********************************************************


use "$final\student_all_years.dta", clear

	**The following 15 lines of code expands the data long so two observations per individua - one for language and one for math.
	*Rename learning outcomes so can perform a reshape
	ren langirtscore irt1
	ren mathirtscore irt2
	forvalues i = 2/ 3 { 
		ren base_mean_langirtscore_g`i' base_mean_g`i'_irt1
		ren base_mean_mathirtscore_g`i' base_mean_g`i'_irt2
	}
	ren base_mean_mathirtscore_g3_mis base_g3_mis
	drop base_mean_langirtscore_g3_mis //This and the above variable are exactly the same. 
	ren base_mean_mathirtscore_g2_mis base_g2_mis 
	drop base_mean_langirtscore_g2_mis //This and the above variable are exactly the same. 	
	*Create temporary ID so can reshape
	gen tempid = _n
	*Reshape the data. 
	reshape long irt base_mean_g2_irt base_mean_g3_irt, i(tempid) j(subject)
	

**Impute the missing values. So that we do not lose observations when adding them as controls in the regression. 		
		foreach var of varlist student_age female HT_age HT_female {
			gen `var'_mis = `var' == .
			qui sum `var' if treat== 0
			replace `var' = `r(mean)' if `var' == .
		}


eststo clear
	*Run the same regressions as for Table 1, but dropping schools with more than 300 students. 
	forvalues p = 1/2 { //this loops midline and endline
		qui reg irt T1 T2 i.strat  i.grade i.subject female female_mis  student_age student_age_mis base_mean_* base_g3_mis base_g2_mis HT_age* HT_female*  ///
			if period  == `p'  & grade > 2 & students_per_grade <= 300 [pweight = 1/students_per_school], cluster(code_ecole)		
			qui test T1 = T2
			local p1 `r(p)' 	
			qui sum irt if treat == 0 & period == 0 & gr3round`p' & gr4round`p' //restricted to control schools in the control, where they also have data for both grade 3 and grade 4 and round pc
			local m = `r(mean)'
			eststo round`p'_full, addscalars(p1 `p1' m `m')		
		forvalues g = 3/4 {
			qui reg irt T1 T2 i.strat i.subject female female_mis  student_age student_age_mis base_mean_* base_g3_mis base_g2_mis HT_age* HT_female*  ///
				if period  == `p'  & grade == `g' & students_per_grade <= 300  [pweight = 1/students_per_grade], cluster(code_ecole)		
				qui test T1 = T2
				local p1 `r(p)' 	
				qui sum irt if treat == 0 & period == 0 & gr`g'round`p'
				local m = `r(mean)'
				eststo round`p'_grade`g', addscalars(p1 `p1' m `m')
			}
		forvalues s = 1/2 {
			qui reg irt T1 T2 i.strat  i.grade female female_mis  student_age student_age_mis base_mean_* base_g3_mis base_g2_mis HT_age* HT_female*  ///
				if period  == `p'  & subject == `s' & grade > 2 & students_per_grade <= 300  [pweight = 1/students_per_school], cluster(code_ecole)		
				qui test T1 = T2
				local p1 `r(p)' 	
				qui sum irt if treat == 0 & period == 0 & subject == `s' & grade > 2
				local m = `r(mean)'
				eststo round`p'_subject`s', addscalars(p1 `p1' m `m')
			}
	}
	
	esttab 	round1_full round2_full round1_grade3 round2_grade3 round1_grade4  round2_grade4 round1_subject1 round2_subject1 round1_subject2 round2_subject2 ///
				using "$tables/learning_by_round_and_grade_drop_outliers.tex", replace ///
				tex label se(3) b(3) alignment(cl) r2 substitute(\_ _ $ \$ ) ///
				stats(m N r2 p1,  fmt(3 0 3) labels("Control mean" "Observations" "R-squared" "Test:In-Kind=Recognition")) ///	
				keep(T1 T2) ///
				collabels(none)	 ///
				mgroups("Full sample"	 "Grade 3"	"Grade 4"	"French"	"Math", pattern(1 0 1 0 1 0 1 0 1 0) ///
				prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span})) ///
				mtitles("Mid" "End" "Mid" "End" "Mid" "End" "Mid" "End" "Mid" "End") ///
				nodepvars nonotes  star(  * 0.1  **  0.05 *** 0.01) 								
		
	
	
	

***********************************************************
***   A.15     Impacts by teacher identity   ***
***********************************************************
		
	
	eststo clear

	qui reg irt T1 T2 i.strat  i.grade i.subject female female_mis  student_age student_age_mis base_mean_* base_g?_mis HT_age* HT_female*  ///
			if period  == 1  & grade > 2 & (base_teach_remain == 1 | base_teach_remain == 0) [pweight = 1/students_per_school], cluster(code_ecole)			
				qui test T1 = T2
				local p1 `r(p)' 	
				eststo full_base_teach_remain, addscalars(p1 `p1')		

	qui reg irt T1 T2 i.strat  i.grade i.subject female female_mis  student_age student_age_mis base_mean_* base_g?_mis HT_age* HT_female*  ///
			if period  == 1  & grade > 2 & base_teach_remain == 1  [pweight = 1/students_per_school], cluster(code_ecole)			
				qui test T1 = T2
				local p1 `r(p)' 	
				eststo base_teach_remain_yes, addscalars(p1 `p1')		
			
	qui reg irt T1 T2 i.strat  i.grade i.subject female female_mis  student_age student_age_mis base_mean_* base_g?_mis HT_age* HT_female*  ///
			if period  == 1  & grade > 2 & base_teach_remain == 0  [pweight = 1/students_per_school], cluster(code_ecole)						
				qui test T1 = T2
				local p1 `r(p)' 	
				eststo base_teach_remain_no, addscalars(p1 `p1')		
	
	
	esttab 	full_base_teach_remain base_teach_remain_yes base_teach_remain_no /// 
			using "$tables/appendix_preffered_profession.tex", replace ///
			tex label se(3) b(3) alignment(cl) r2 substitute(\_ _ $ \$ ) ///
			stats(N r2 p1,  fmt(0 3 3) labels("Observations" "R-squared" "Test:In-Kind=Recognition")) ///	
			keep(T1 T2) ///
			collabels(none)	 ///
			mgroups("Preferred profcession" , pattern(1 0 0) ///
			prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span})) ///
			mtitles("Either" "Teacher" "Other") ///
			nodepvars nonotes  star(  * 0.1  **  0.05 *** 0.01) 		
	

	


***********************************************************
***   A.16 Impacts on learning--- by exposure to (log) Ebola  ***
***********************************************************

*Merge in the Ebola data
merge m:1 code_ecole using "$final\Ebola by prefecture with school code.dta", gen(merge_ebola)
lab var no_ebola "No Ebola"

**Assume that if unmatched (i.e. no data on prefectures), there were no cases by end of May. A reasonable assumption. 
gen mis_ebola_data = ebola == .
replace ebola = 0 if ebola == .
replace no_ebola = 1 if no_ebola == .
gen invhs_ebola = asinh(ebola)

*Interact with treatment
forvalues  i = 1/2 {
	foreach var of varlist    invhs_ebola  ebola   {
		gen `var'_x_T`i' = `var' * T`i'
	}
}	
lab var invhs_ebola "Ebola (invhs)"
lab var invhs_ebola_x_T1 "Ebola (invhs) x In-Kind"
lab var invhs_ebola_x_T2 "Ebola (invhs) x Recognition"


**Same code as Table 4 regressions, but using the log transformation of Ebola rather than total number of cases.  (inverse hyperbolic sin)
eststo clear

qui reg irt T1 T2 invhs_ebola invhs_ebola_x_T? i.strat  i.grade i.subject female female_mis student_age student_age_mis base_mean_* HT_age* HT_sex*  ///
		 if period  == 1  & grade > 2 & merge_ebola == 3 [pweight = 1/students_per_school], cluster(code_ecole)	nocons	
		qui test T1 = T2
		local p1 `r(p)' 	
		eststo midline_interact_allgrades, addscalars(p1 `p1')

qui reg irt T1 T2 invhs_ebola invhs_ebola_x_T? i.strat  i.grade i.subject female female_mis student_age student_age_mis  base_mean_* HT_age* HT_sex*  ///
		 if period  == 2  & grade > 2 & merge_ebola == 3 [pweight = 1/students_per_school], cluster(code_ecole)	nocons	
		qui test T1 = T2
		local p1 `r(p)' 	
		eststo endline_interact, addscalars(p1 `p1')

qui reg irt T1 T2 invhs_ebola invhs_ebola_x_T? i.strat  i.grade i.subject female female_mis student_age student_age_mis  base_mean_* HT_age* HT_sex*  ///
		 if period  == 2  & grade == 3 & merge_ebola == 3 [pweight = 1/students_per_school], cluster(code_ecole)	nocons	
		qui test T1 = T2
		local p1 `r(p)' 	
		eststo endline_interact_gr3, addscalars(p1 `p1')
	
qui reg irt T1 T2 invhs_ebola invhs_ebola_x_T? i.strat  i.grade i.subject female female_mis student_age student_age_mis  base_mean_* HT_age* HT_sex*  ///
		 if period  == 2  & grade == 4 & merge_ebola == 3 [pweight = 1/students_per_school], cluster(code_ecole)	nocons	
		qui test T1 = T2
		local p1 `r(p)' 	
		eststo endline_interact_gr4, addscalars(p1 `p1')
		
	esttab midline_interact_allgrades endline_interact  endline_interact_gr3 endline_interact_gr4 ///
		using "$tables/learning_by_logebola.tex", replace ///
		tex label se(3) b(3) alignment(cl) r2 substitute(\_ _ $ \$ ) ///
		stats(N r2,  fmt(0 3) labels("Observations" "R-squared")) ///	
		keep( T1 T2 invhs_ebola invhs_ebola_x_T1 invhs_ebola_x_T2 ) ///
		collabels(none)	 ///
		mgroups("Midline" "Endline" , pattern(1 1 0 0) ///
		prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span})) ///
		mtitles("Full" "Full" "Grade 3" "Grade 4" ) ///
		nodepvars nonotes  star(  * 0.1  **  0.05 *** 0.01) 								
	
	
***********************************************************
***   A.17 Impacts on learning--- by exposure to Ebola (reduced sample)   ***
***********************************************************

lab var ebola "Ebola"
lab var ebola_x_T1 "Ebola x In-Kind"
lab var ebola_x_T2 "Ebola x Recognition"

***Same as code for Table 4, but treating those 5 prefectures with no data as missing. 		
eststo clear
qui reg irt T1 T2 ebola i.strat  i.grade i.subject female female_mis student_age student_age_mis base_mean_* HT_age* HT_sex*  ///
		 if period  == 1  & grade > 2 & merge_ebola == 3 & mis_ebola_data == 0 [pweight = 1/students_per_school], cluster(code_ecole)	nocons	
		qui test T1 = T2
		local p1 `r(p)' 	
		eststo midline_ate_allgrades, addscalars(p1 `p1')
	
qui reg irt T1 T2 ebola ebola_x_T? i.strat  i.grade i.subject female female_mis student_age student_age_mis base_mean_* HT_age* HT_sex*  ///
		 if period  == 1  & grade > 2 & merge_ebola == 3 & mis_ebola_data == 0 [pweight = 1/students_per_school], cluster(code_ecole)	nocons	
		qui test T1 = T2
		local p1 `r(p)' 	
		eststo midline_interact_allgrades, addscalars(p1 `p1')

qui reg irt T1 T2 ebola i.strat  i.grade i.subject female female_mis student_age student_age_mis  base_mean_* HT_age* HT_sex*  ///
		 if period  == 2  & grade > 2 & merge_ebola == 3 & mis_ebola_data == 0 [pweight = 1/students_per_school], cluster(code_ecole)	nocons	
		qui test T1 = T2
		local p1 `r(p)' 	
		eststo endline_ate, addscalars(p1 `p1')
			
qui reg irt T1 T2 ebola ebola_x_T? i.strat  i.grade i.subject female female_mis student_age student_age_mis  base_mean_* HT_age* HT_sex*  ///
		 if period  == 2  & grade > 2 & merge_ebola == 3 & mis_ebola_data == 0 [pweight = 1/students_per_school], cluster(code_ecole)	nocons	
		qui test T1 = T2
		local p1 `r(p)' 	
		eststo endline_interact, addscalars(p1 `p1')
		
qui reg irt T1 T2 i.strat  i.grade i.subject female female_mis student_age student_age_mis  base_mean_* HT_age* HT_sex*  ///
		 if period  == 2  & grade == 3 & merge_ebola == 3 & mis_ebola_data == 0 [pweight = 1/students_per_school], cluster(code_ecole)	nocons	
		qui test T1 = T2
		local p1 `r(p)' 	
		eststo endline_ate_gr3, addscalars(p1 `p1')
		
qui reg irt T1 T2 ebola ebola_x_T? i.strat  i.grade i.subject female female_mis student_age student_age_mis  base_mean_* HT_age* HT_sex*  ///
		 if period  == 2  & grade == 3 & merge_ebola == 3 & mis_ebola_data == 0 [pweight = 1/students_per_school], cluster(code_ecole)	nocons	
		qui test T1 = T2
		local p1 `r(p)' 	
		eststo endline_interact_gr3, addscalars(p1 `p1')
	
qui reg irt T1 T2 i.strat  i.grade i.subject female female_mis student_age student_age_mis  base_mean_* HT_age* HT_sex*  ///
		 if period  == 2  & grade == 4 & merge_ebola == 3 & mis_ebola_data == 0 [pweight = 1/students_per_school], cluster(code_ecole)	nocons	
		qui test T1 = T2
		local p1 `r(p)' 	
		eststo endline_ate_gr4, addscalars(p1 `p1')


qui reg irt T1 T2 ebola ebola_x_T? i.strat  i.grade i.subject female female_mis student_age student_age_mis  base_mean_* HT_age* HT_sex*  ///
		 if period  == 2  & grade == 4 & merge_ebola == 3 & mis_ebola_data == 0 [pweight = 1/students_per_school], cluster(code_ecole)	nocons	
		qui test T1 = T2
		local p1 `r(p)' 	
		eststo endline_interact_gr4, addscalars(p1 `p1')
		
	esttab midline_ate_allgrades midline_interact_allgrades endline_ate endline_interact  ///
		endline_ate_gr3 endline_interact_gr3 endline_ate_gr4 endline_interact_gr4 ///
		using "$tables/learning_by_ebola_reduced_sample.tex", replace ///
		tex label se(3) b(3) alignment(cl) r2 substitute(\_ _ $ \$ ) ///
		stats(N r2,  fmt(0 3) labels("Observations" "R-squared")) ///	
		keep(T1 T2 ebola ebola_x_T1 ebola_x_T2) ///
		collabels(none)	 ///
		mgroups("Midline" "Endline" "Endline Grade 3" "Endline Grade 4" , pattern(1 0 1 0 1 0 1 0) ///
		prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span})) ///
		mtitles("" "" "" "" "" "" "" "") ///
		nodepvars nonotes  star(  * 0.1  **  0.05 *** 0.01) 								

***********************************************************
***   Figure A.4. Descriptive statistics: distribution of Ebola.    ***
***********************************************************

	
	lab var ebola "Known deaths from Ebola"
	bys prefecture_ebola: gen tag_pref = _n //This allows me to plot it at a prefecture level (when tag_pref == 1)
	histogram ebola if tag_pref == 1, width(3) frac
	graph export "$figs\ebola_distribution_may2014.png", replace
		
	lab var ebola "Ebola"

********************
**** Done!		****
********************
